set more off
cap log close
log using log/data_construction, replace


**** BASELINE SURVEY**************

use src/base_encuesta_sim_v3, clear

label variable id "Identifier"
label variable fecha_acceso "Access Date"
label variable monitor "Assesored by monitor"
label variable duracion "Session duration (minutes)"
label variable consentimiento "Consent"
label variable es_afiliado "Affiliated"
label variable id_oficina_cha "Office ID"
label variable hasta_donde "Last step completed (considers only mandatory questions)"
label variable tipo_afiliado "Control or treatment"
label variable edad "Age"
label variable sexo "Sex"
label variable saldo_uf "Balance mandatory account (UF)"
label variable bono_uf "Bono (UF)"
label variable meses_cot_act "Months contributed"
label variable rem_actual "Wage (avg. \char36 last 6 months)"
label variable apv_sim "APV simulation"
label variable num_sim "Simulation NРin same session"
label variable res_sim_1 "Estimated pension (\char36) simulation type 1 (normal)"
label variable res_sim_2 "Estimated pension (\char36) simulation type 2 (contributes 12 months)"
label variable res_sim_3 "Estimated pension (\char36) simulation type 3 (APV)"
label variable res_sim_4 "Estimated pension (\char36) simulation type 4 (delays 1 year)"
label variable edad_retiro "Retirement age"
label variable apv_t1 "APV simulated (age<35)"
label variable apv_t2 "APV simulated (35<=age<50(55))"
label variable apv_t3 "APV simulated (50(55)<=age<60(65))"
label variable meses_cot_t1 "Simulated contributed months (age<35)"
label variable meses_cot_t2 "Simulated contributed months (35<=age<50(55))"
label variable meses_cot_t3 "Simulated contributed months (50(55)<=age<60(65))"
label variable sexo_no_afi "Sex (non affiliated)"
label variable edad_no_afi "Age (non affiliated)"
label variable como_se_entero "How did you get to know about the module"
label variable situacion_laboral "Labor situation"
label variable nivel_educacional "Educational level"
label variable relacion_jefe_hogar "Relation with head of household"
label variable importancia_pension_afi "Importance of AFP pension in relation with other income sources"
label variable monto_ahorrado "Savings (M\char36) outside system"
label variable comodidad_dec_financieras "Ease with system (1-7)"
label variable como_calcula_pension "Knows how pension is computed"
label variable ptaje_ingreso_pension "\% of income discounted by AFP"
label variable quien_mas_rico "Friend receives \char36 1MM today and brother receives \char36 1MM in 3 years"
label variable ahorro_con_interes "100000 at 2\% for 5 years"
label variable ahorro_con_interes_inf "1\% annual interest rate versus 2\% inflation"
label variable deseada_pens "Desired pension (\char36)"
label variable esperada_pens "Expected pension (\char36)"
label variable ingresos_no_afi "Income (non affiliated)"

split fecha_acceso, parse(-)
destring fecha_acceso1 fecha_acceso2 fecha_acceso3, replace
gen fecha = mdy(fecha_acceso2,fecha_acceso3,fecha_acceso1)
format %tdDD/NN/CCYY fecha
drop fecha_acceso fecha_acceso1 fecha_acceso2 fecha_acceso3
rename fecha fecha_acceso
order fecha_acceso N, after(id)

gen Afiliado = es_afiliado == "S"
label variable Afiliado "Affiliated"
drop es_afiliado

replace monto_ahorrado = monto_ahorrado/1000

gen consentimiento2 = consentimiento == "S"
drop consentimiento
rename consentimiento2 consentimiento
order consentimiento, after(duracion)
label define consentimiento_label 1 "Si" 0 "No"
label value consentimiento consentimiento_label

gen Tratamiento = 1 if tipo_afiliado == "T"
replace Tratamiento = 0 if tipo_afiliado == "C"
label define Tratamiento_label 1 "Tratado" 0 "Control"
label values Tratamiento Tratamiento_label
drop tipo_afiliado
order Tratamiento, after(hasta_donde)
label var Tratamiento "Treatment assignment"

format %11.0g monto_ahorrado
format %11.0g deseada_pens
format %11.0g esperada_pens

# delimit ;
label define id_oficina_cha_label 
1	"Alameda"
2	"Melipilla"
3	"Providencia"
4	"San Fernando"
5	"Estacion Central"
6	"XXX"
7	"Rancagua"
8	"San miguel";
#delimit cr
label values id_oficina_cha id_oficina_cha_label

# delimit ;
label define como_se_entero_label 
1	"Un amigo o alguien de mi familia me conto sobre"
2	"Alguien de mi trabajo me conto sobre"
3	"Un funcionario de IPS Chileatiende me conto sobre"
4	"Lo vi al entrar a IPS Chile atiende"
5	"Fui asesorado personalmente para ingresar al modulo";
#delimit cr
label values como_se_entero como_se_entero_label

# delimit ;
label define nivel_educacional_label 
1	"Basica Incompleta"
2	"Basica Completa"
3	"Media Incompleta"
4	"Media Completa"
5	"Terciaria Incompleta"
6	"Terciaria Completa"
7	"Universitaria Incompleta"
8	"Universitaria Completa"
9	"Postgrado";
#delimit cr
label values nivel_educacional nivel_educacional_label

# delimit ;
label define relacion_jefe_hogar_label 
1	"Soy el (la) jefe(a) de hogar"
2	"Soy esposo(a) o pareja del (de la) jefe(a) de hogar"
3	"Soy hijo(a) del (de la) jefe(a) de hogar"
4	"Soy padre o madre del (de la) jefe(a) de hogar"
5	"Otro (hermano, primo, tio, etc.)"
6	"Ninguna";
#delimit cr
label values relacion_jefe_hogar relacion_jefe_hogar_label

# delimit ;
label define importancia_pension_afi_label
1	"Muy importante, no tengo otras fuentes de ingreso"
2	"Importante, las otras fuentes de ingreso son menores"
3	"Poco importante, tengo otras fuentes de ingreso de muy alto monto"
4	"Nada importante, no afecta mi nivel de ingresos";
#delimit cr
label values importancia_pension_afi importancia_pension_afi_label

# delimit ;
label define como_calcula_pension_label 
1	"En base al salario de los ultimos 3 anios"
2	"En base a las reglas que tiene cada AFP para calcular los montos de pension"
3	"En base a las cotizaciones que usted ha hecho en su AFP"
4	"No sabe";
#delimit cr
label values como_calcula_pension como_calcula_pension_label

# delimit ;
label define quien_mas_rico_label 
1	"Mi amigo"
2	"Mi hermano"
3	"Son igualmente ricos"
4	"No sabe";
#delimit cr
label values quien_mas_rico quien_mas_rico_label

# delimit ;
label define ahorro_con_interes_label 
1	"Mas de \char36 102.000"
2	"Exactamente  \char36 102.000"
3	"Menos de  \char36 102.000"
4	"No sabe";
#delimit cr
label values ahorro_con_interes ahorro_con_interes_label

# delimit ;
label define ahorro_con_interes_inf_label 
1	"Mas que hoy"
2	"Exactamente lo mismo que hoy"
3	"Menos que hoy"
4	"No sabe";
#delimit cr
label values ahorro_con_interes_inf ahorro_con_interes_inf_label

# delimit ;
label define ingresos_no_afi_label 
1 "\char36 0"
2 "Entre \char36 1 y \char36 199.999"
3 "Entre \char36 200.000 y \char36 299.999"
4 "Entre \char36 300.000 y \char36 399.999"
5 "Entre \char36 400.000 y \char36 499.999"
6 "MⳠde \char36 500.000";
#delimit cr
label values ingresos_no_afi ingresos_no_afi_label

save data/Base_Encuesta, replace

*
********************************************************************************
* 2.GENERATION OF VARIABLES FOR BASELINE
********************************************************************************

use data/Base_Encuesta.dta, clear

* Edad:
replace edad = edad_no_afi if edad == .
replace edad = . if edad == 5645
replace edad = . if edad < 15

* Sexo:
replace sexo_no_afi = "" if sexo_no_afi == "."
replace sexo = sexo_no_afi if sexo == ""
gen Mujer = 1 if sexo == "F"
replace Mujer = 0 if sexo == "M"
label var Mujer "Female"

* Educacion
gen Educ1 = nivel_educacional <= 3 if nivel_educacional ~=.
gen Educ2 = nivel_educacional == 4 if nivel_educacional ~=.
gen Educ3 = nivel_educacional >=5 & nivel_educacional<=7 if nivel_educacional ~=.
gen Educ4 = nivel_educacional == 8 | nivel_educacional == 9 if nivel_educacional ~=.

label var Educ1 "Primary school"
label var Educ2 "High school"
label var Educ3 "Some post-secondary"
label var Educ4 "University"

* Relaci󮠣on el jefe de hogar:
gen JefeHogar = relacion_jefe_hogar == 1 if relacion_jefe_hogar ~= .
label var JefeHogar "Head of household"

* Oficina:
forvalues i = 1/8 {
gen Oficina`i' = id_oficina_cha == `i' if id_oficina_cha ~=.
}
label var Oficina1 "Alameda"
label var Oficina2 "Melipilla"
label var Oficina3 "Providencia"
label var Oficina4 "San Fernando"
label var Oficina5 "Estacion Central"
label var Oficina6 "XXX"
label var Oficina7 "Rancagua"
label var Oficina8 "San Miguel"

* Importancia AFP:
gen AFP_Importante = importancia_pension_afi <=2 if importancia_pension_afi~=.
replace AFP_Importante = 0 if AFP_Importante == . & Afiliado == 0
label variable AFP_Importante "AFP important for retirement"

* Remuneracion
gen ingresos_no_afi_num=0 if ingresos_no_afi==1
replace ingresos_no_afi_num=100000 if ingresos_no_afi==2
replace ingresos_no_afi_num=250000 if ingresos_no_afi==3
replace ingresos_no_afi_num=350000 if ingresos_no_afi==4
replace ingresos_no_afi_num=450000 if ingresos_no_afi==5

* Remuneracion actual por categoria:
gen     rem_cat = 1 if rem_actual == 0
replace rem_cat = 2 if rem_actual >  0      & rem_actual < 200000
replace rem_cat = 3 if rem_actual >= 200000 & rem_actual < 300000
replace rem_cat = 4 if rem_actual >= 300000 & rem_actual < 400000
replace rem_cat = 5 if rem_actual >= 400000 & rem_actual < 500000
replace rem_cat = 6 if rem_actual >= 500000 & rem_actual != .

replace rem_actual = ingresos_no_afi_num if rem_actual == .
replace rem_cat = ingresos_no_afi if rem_cat == .

replace rem_actual = 0 if rem_actual == .
replace rem_cat = 7 if rem_cat == . 

* Calculo de pension
gen CalculoPension = como_calcula_pension == 3 if como_calcula_pension ~=.
label var CalculoPension "Knows how are pensions calculated"

* Porcentaje de ingreso descontado por AFP:
gen PctajeAFP = ptaje_ingreso_pension >= 10 & ptaje_ingreso_pension <= 12 if ptaje_ingreso_pension ~= . 
label var PctajeAFP "Knows \% of wage discounted"

* Financial knowledge:
gen FinancKnow = (quien_mas_rico == 1) + (ahorro_con_interes == 1) + (ahorro_con_interes_inf == 3) if quien_mas_rico~=. | ahorro_con_interes ~= . | ahorro_con_interes_inf~= .
label var FinancKnow "Financial knowledge score (1-3)"

* Fix values for non affiliateds:
replace saldo_uf=0 if Afiliado==0
replace bono_uf=0 if Afiliado==0
replace meses_cot_act=0 if Afiliado==0
replace apv_sim=0 if Afiliado==0
replace res_sim_1 = 0 if Afiliado == 0

* Diferencia entre pensi󮠥sperada y deseada:
gen ErrorPension = res_sim_1 - esperada_pens
gen ErrorPensionABS = abs(ErrorPension)
gen ErrorPensionRel = ErrorPension/(res_sim_1+esperada_pens)
label var ErrorPension "Mistake in expected pension"
label var ErrorPensionABS "Mistake (absolute value)"

* Diferencia entre pensi󮠳imulada e ingreso actual:
gen RepRate = res_sim_1/rem_actual

* Situaci󮠬aboral:
gen situacion_laboral_p1 = situacion_laboral == "1" | situacion_laboral == "1,2" | situacion_laboral == "1,2,4" | situacion_laboral == "1,3" | situacion_laboral == "1,4" | situacion_laboral == "1,5" | situacion_laboral == "1,5,6" | situacion_laboral == "1,6" | situacion_laboral == "1,7" if situacion_laboral ~= "" & situacion_laboral ~= "."
gen situacion_laboral_p2 = situacion_laboral == "1,2" | situacion_laboral == "1,2,4" | situacion_laboral == "2" | situacion_laboral == "2,3" | situacion_laboral == "2,3,4" | situacion_laboral == "2,4" | situacion_laboral == "2,5,6" | situacion_laboral == "2,6" if situacion_laboral ~= "" & situacion_laboral ~= "."
gen situacion_laboral_p3 = situacion_laboral == "1,3" | situacion_laboral == "2,3" | situacion_laboral == "2,3,4" | situacion_laboral == "3" | situacion_laboral == "3,4" | situacion_laboral == "3,6" if situacion_laboral ~= "" & situacion_laboral ~= "."
gen situacion_laboral_p4 = situacion_laboral == "1,2,4" | situacion_laboral == "1,4" | situacion_laboral == "2,3,4" | situacion_laboral == "2,4" | situacion_laboral == "3,4" | situacion_laboral == "4" | situacion_laboral == "4,6" | situacion_laboral == "4,7" if situacion_laboral ~= "" & situacion_laboral ~= "."
gen situacion_laboral_p5 = situacion_laboral == "1,5" | situacion_laboral == "1,5,6" | situacion_laboral == "2,5,6" | situacion_laboral == "5" | situacion_laboral == "5,6" | situacion_laboral == "5,7" if situacion_laboral ~= "" & situacion_laboral ~= "."
gen situacion_laboral_p6 = situacion_laboral == "1,5,6" | situacion_laboral == "1,6" | situacion_laboral == "2,5,6" | situacion_laboral == "2,6" | situacion_laboral == "3,6" | situacion_laboral == "4,6" | situacion_laboral == "5,6" | situacion_laboral == "6" | situacion_laboral == "6,7" if situacion_laboral ~= "" & situacion_laboral ~= "."
gen situacion_laboral_p7 = situacion_laboral == "1,7" | situacion_laboral == "4,7" | situacion_laboral == "5,7" | situacion_laboral == "6,7" | situacion_laboral == "7" if situacion_laboral ~= "" & situacion_laboral ~= "."
label variable situacion_laboral_p1	"Employed"
label variable situacion_laboral_p2	"Self employed"
label variable situacion_laboral_p3	"Looking for a job"
label variable situacion_laboral_p4	"Studying"
label variable situacion_laboral_p5	"Retired"
label variable situacion_laboral_p6	"Receiving pension"
label variable situacion_laboral_p7	"Not working and not looking for a job"
gen Working = situacion_laboral_p1 == 1 | situacion_laboral_p2 == 1
gen LaborForce = situacion_laboral_p1 == 1 | situacion_laboral_p2 == 1 | situacion_laboral_p3 == 1
label variable Working "Working"
label variable LaborForce "In labor force"

gen Pensionable = (res_sim_1>=0.7*rem_actual & rem_actual!=.) & (res_sim_1>=0.8*291778) if res_sim_1!=. 
replace Pensionable = 1 if (edad>=64 & Mujer == 0) | (edad>=59 & Mujer == 1) 

replace bono_uf = 0 if bono_uf == .

label variable res_sim_1 "Simulated pension"

********************************************************************************

* Mantener s󬯠observaciones v⭩das:
drop if consentimiento == 0
keep if hasta_donde == "termina_encuesta"
drop if Tratamiento == .

********************************************************************************
********************************************************************************
********************************************************************************

* Borra simulaciones repetidas (son exactamente iguales a pesar de aparecer como distintos intentos).
bysort ID1 ID2 res_sim_* (fecha_acceso num_sim N), sort: keep if _n == 1 

bysort ID1: gen size = _N
replace num_sim = 1 if num_sim == . & size == 1

* En estas observaciones num_sim estᡭal calculado porque aparece como primera una fecha posterior:
drop if ID1 == 2057 & fecha_acceso != date("20141210","YMD") 
drop if ID1 == 7700 & fecha_acceso != date("20140826","YMD") 
drop if ID1 == 8320 & fecha_acceso != date("20141203","YMD") 

bysort ID1: egen min1 = min(num_sim)
bysort ID1: egen min2 = min(N)
replace num_sim = 1 if min1 > 1 & N == min2 

* S󬯠quiero dejar una observaci󮠰or individuo:
keep if num_sim == 1


* Heterogeneidad res_sim:
sum res_sim_1, d
gen lowPension = res_sim_1 <  r(p50) if res_sim_1 !=.
gen highPension = res_sim_1 >= r(p50) if res_sim_1 !=.

* Tenemos que considerar la primera fecha en que realmente terminaron la encuesta y recibieron el mensaje:
gen periodo_exp = ym(year(fecha_acceso), month(fecha_acceso))
format %tmMon_CCYY periodo_exp

* Estado no afiliados (s󬯠los que no se afialiaron en todo el pero):
merge m:1 ID1 ID2 using src/no_afi_estado
drop _merge

* Personas no aleatorizadas:
drop if edad >= 68 & edad != . 

* Ninguna de estas personas podresponder al tratamiento y no deb haber sido aleatorizados:
drop if estado == "antiguo_cai" | estado == "datos_incons" | estado == "fallecido" | estado == "pensionado"

preserve
* Solicitud de pension

use src/base_outcomes3_12m_v2.dta, clear
collapse fecsol_pen, by(ID1 ID2)
gen year_sol  = trunc(fecsol_pen/10000)
gen month_sol = trunc((fecsol_pen - year_sol*10000)/100)
gen day_sol   = fecsol_pen - year_sol*10000 - month_sol*100
replace day_sol = 31 if day_sol == 32
gen date_sol  = mdy(month_sol,day_sol,year_sol)
format %td date_sol
drop fecsol_pen year_sol month_sol day_sol
save temp/Sol_pension, replace
restore

merge 1:1 ID1 ID2 using temp/Sol_pension
drop if _merge == 2

* Identificar personas que solicitaron pensi󮠡ntes del tratamiento:
gen Sol_Anterior = date_sol < fecha_acceso if date_sol != .
drop if Sol_Anterior == 1
drop _merge Sol_Anterior date_sol

* CHECK:
drop if ID1 == 2057 

save data/Base_Encuesta_procesada, replace

*
********************************************************************************
* 3.- ADMINISTRATIVE DATA
********************************************************************************

use src/base_outcomes3_12m_v2.dta, clear

drop if ID1 == 5274

drop periodo_exp 

merge m:1 ID1 ID2 using data/Base_Encuesta_procesada
drop if _merge == 1 
gen NuncaAfiliado = _merge == 2 
drop _merge

label var ID1 "ID"
label var ID2 "ID2"
label var periodo_exp "Exposition date"
label var n_per "Number of months after/before exposition"
label var expuesto "Post"
label var cot_obli "Mandatory savings (\char36)"
label var cot_vol "Voluntary savings (\char36)"
label var pensionado "Retired"
label var cambio_fondo "Changed fund"
label var cambio_afp "Changed AFP"
label var clave_activa "Active password"
label var fec_dev "Date"
label var indep "Independent"
label var Tratamiento "Treatment"

gen year_dev = trunc(fec_dev/100)
gen month_dev = fec_dev-year_dev*100
gen date_dev = ym(year_dev,month_dev)
format %tmMon_CCYY date_dev
label var date_dev "Date"

drop fec_dev year_dev month_dev

////////////////////////////////////////////////////////////////////////////////

* Generar variable indicadora del tiempo para hacer reshape:
gen Months = date_dev - periodo_exp + 13 // +13 para que todos sean positivos y poder hacer reshape.

drop expuesto // Ahora esto se puede obtener viendo si el mes es mayor o igual a 14.

********************************************************************************

* No afiliados:

* Observaciones para no afiliados (cero cotizaciones en cada pero, a menos que se actualicen luego):
expand 25 if NuncaAfiliado == 1
bysort ID1 ID2: gen ob = _n
drop if ob == 13 & NuncaAfiliado == 1
replace Months = ob if NuncaAfiliado == 1
drop ob

forvalues i = 1/12 {
replace date_dev = periodo_exp+`i'-13 if Months == `i' & NuncaAfiliado == 1
replace date_dev = periodo_exp + `i' if Months == 13+`i' & NuncaAfiliado == 1
}

preserve
tempfile New_Affiliated
use src/datos_no_afi, clear
gen year_dev  = trunc(fec_dev/100)
gen month_dev = fec_dev - year_dev*100
gen date_dev  = ym(year_dev,month_dev)
format %tmMon_CCYY date_dev
label var date_dev "Date"

gen year_afil  = trunc(fec_afil/100)
gen month_afil = fec_afil-year_afil*100
gen date_afil  = ym(year_afil,month_afil)
format %tmMon_CCYY date_afil
label var date_afil "Affiliation Date"
* Variables no usadas:
drop fec_afil fec_dev year_dev month_dev year_afil month_afil
* Variables que ya estan en la base:
drop periodo_exp expuesto
save `New_Affiliated'
restore

* Cero para no afiliados (despu고lo voy a reemplazar en el pr󸩭o merge para los que tengamos datos):
foreach var of varlist cot_obli cot_vol indep pensionado cambio_fondo cambio_afp clave_activa {
replace `var' = 0 if NuncaAfiliado == 1
}

merge 1:1 ID1 ID2 date_dev using `New_Affiliated', update replace
replace Afiliado = 1 if _merge == 5
drop if _merge == 2 
bysort ID1 ID2: egen date_afil_2 = max(date_afil)
replace Afiliado = 1 if date_afil_2 <= date_dev & date_afil_2 != . 
drop _merge date_afil date_afil_2

********************************************************************************

* Variables para medir impacto:
gen ln_cot_obli = ln(cot_obli+1)
gen ln_cot_vol = ln(cot_vol+1)
label var ln_cot_obli "Mandatory savings (logs)"
label var ln_cot_vol  "Voluntary savings (logs)"

gen ihs_cot_obli = log(cot_obli + sqrt(cot_obli^2 + 1))
gen ihs_cot_vol = log(cot_vol + sqrt(cot_vol^2 + 1))
label var ihs_cot_obli "Mandatory savings (ihs)"
label var ihs_cot_vol  "Voluntary savings (ihs)"

gen Contributed = cot_obli > 0 & cot_obli ~= .
label var Contributed "Contributed"

gen ContributedVol = cot_vol > 0 & cot_vol ~= .
label var ContributedVol "Contributed (Voluntary)"

gen cot_obli_ing = cot_obli/rem_actual
gen cot_vol_ing = cot_vol/rem_actual

replace pensionado = 0 if pensionado == .

gen Cont_NoRet    = Contributed    == 1 & pensionado == 0
gen ContVol_NoRet = ContributedVol == 1 & pensionado == 0

gen savings = cot_obli+cot_vol
gen ln_savings = ln(1+savings)
label var ln_savings "Total savings (logs)"

gen ihs_savings = log(savings + sqrt(savings^2 + 1))
label var ihs_savings "Total savings (ihs)"

* Porcentaje cotizaciones obligatorias/voluntarias:
gen Perc_CotVol   = cot_vol/rem_actual  // Deberser 1%.
gen Perc_CotVol_2 = cot_vol/cot_obli    // Deberser aprox. 10 si es el 1%.
gen Perc_CotObli  = cot_obli/rem_actual // Deberser 10%.

* Controles:
gen ln_rem_actual = ln(1+rem_actual)
gen no_rem_actual = rem_actual == .
replace ln_rem_actual = 0 if no_rem_actual == 1
xtile I_rem_actual = rem_actual, nquantiles(10)
gen ln_res_sim_1 = ln(1+res_sim_1)

gen no_saldo_uf = saldo_uf == .
replace saldo_uf = 0 if no_saldo_uf == 1

gen indep_pre1 = situacion_laboral_p2 == 1 // Independiente seg򮠬os datos administrativos o seg򮠥ncuesta LB. 
gen indep_pre2 = (indep == 1 & Months == 12)
bysort ID1 ID2: egen indep_pre2_tot = sum(indep_pre2)
gen indep_pre = indep_pre1 == 1 | indep_pre2_tot == 1
drop indep_pre1 indep_pre2 indep_pre2_tot

* Solicitud de pension
gen year_sol  = trunc(fecsol_pen/10000)
gen month_sol = trunc((fecsol_pen - year_sol*10000)/100)
gen day_sol   = fecsol_pen - year_sol*10000 - month_sol*100
replace day_sol = 31 if day_sol == 32
gen date_sol  = mdy(month_sol,day_sol,year_sol)
drop fecsol_pen year_sol month_sol day_sol

bysort ID1 ID2: egen date_sol2 = max(date_sol)
replace date_sol = date_sol2
format %tdDD/NN/CCYY date_sol
drop date_sol2

* Identificar personas que solicitaron pensi󮠡ntes del tratamiento:
gen Sol_Anterior = date_sol < fecha_acceso if date_sol != .
drop if Sol_Anterior == 1
drop Sol_Anterior
* Estas personas no eran parte de la aleatorizaci󮠹 lo fueron exclusivamente porque no tenos la info a tiempo.

* Cambio de formato:
bysort ID1: egen m_tipo_penvej = max(tipo_penvej)
replace tipo_penvej = m_tipo_penvej

gen mod_pension = 1 if mod_pen == "Renta Temporal"
replace mod_pension = 2 if mod_pen == "Renta Vitalicia"
replace mod_pension = 3 if mod_pen == "Retiro Programado"
bysort ID1: egen m_mod_pension = max(mod_pension)
label define mod_pension_label 1 "Renta Temporal" 2 "Renta Vitalicia" 3	"Retiro Programado"
label values m_mod_pension mod_pension_label
drop mod_pen mod_pension m_tipo_penvej
rename m_mod_pension mod_pen

* Cambiar pensionado a "Se pensiono este periodo"
gen pensionado_acum = pensionado // Esta version va a serguir siendo "pensionado hasta ahora"
by ID1 (Months), sort: gen byte first = sum(pensionado) == 1  & sum(pensionado[_n - 1]) == 0
replace pensionado = first
drop first

* Fecha solititud de pension
gen YM_sol = ym(year(date_sol),month(date_sol))
format %tmMon_CCYY YM_sol
gen Solicitud = YM_sol == date_dev // Coincide casi en todos los casos con la fecha de retiro.
* Obs: los ids que tienen retiro pero no solicitud pidieron la pensi󮠥l mes de tratamiento.

* Contribuye voluntariamente a񯠡nterior:
tempvar ContVolPost
gen     `ContVolPost' = (ContributedVol > 0) if date_dev < periodo_exp
replace `ContVolPost' = 0 if (date_dev > periodo_exp)
bysort ID1 ID2: egen ContVolPrevio = sum(`ContVolPost')
replace ContVolPrevio = ContVolPrevio > 0

* Variables para balance de datos administrativos:
gen post = date_dev > periodo_exp
bysort ID1 post: egen TotCotVol = sum(cot_vol)
bysort ID1 post: egen TotCot    = sum(cot_obli)
bysort ID1 post: egen NCotVol   = sum(ContributedVol)
bysort ID1 post: egen NCot      = sum(Contributed)

foreach var of varlist TotCotVol TotCot NCotVol NCot {
replace `var' = . if post == 1 // S󬯠quiero quedarme con los valores de la base para el balance.
tempvar `var'1
bysort ID1: egen ``var'1' = max(`var') // Esto es para tener los mismo valores pre y post tratamiento.
replace `var' = ``var'1'
}
drop post

replace TotCotVol = TotCotVol/1000
replace TotCot    = TotCot/1000

label var TotCotVol "Voluntary Cont. (M\char36)"
label var TotCot "Mandatory Cont. (M\char36)"
label var NCotVol "N Voluntary Cont."
label var NCot "N Mandatory Cont."
label var ContVolPrevio "Ever Contributed Vol."

********************************************************************************

* Personas no aleatorizadas:
drop if edad >= 68 & edad != . 
drop if estado == "antiguo_cai" | estado == "datos_incons" | estado == "fallecido" | estado == "pensionado"

label var Tratamiento "Personalized Info."

cap drop __00000*

* Dummies de edad por rango (control)
egen edad_rango = cut(edad), at(10(10)90)
tab edad_rango, gen(edad_dummy_)

save data/Base_ADM_preparada_Long, replace

use data/Base_ADM_preparada_Long, clear

reshape wide date_dev cot_obli cot_vol ln_savings savings ln_cot_obli ln_cot_vol ihs_savings ihs_cot_obli ihs_cot_vol pensionado pensionado_acum ///
			 cambio_fondo cambio_afp clave_activa Contributed ContributedVol cot_obli_ing cot_vol_ing Cont_NoRet ContVol_NoRet indep Afiliado ///
			 Perc_CotVol Perc_CotVol_2 Perc_CotObli Solicitud, i(ID1 ID2) j(Months)
			 
save data/Base_ADM_preparada, replace


*
********************************************************************************
* 4.- DATA FINAL AMOUNTS:
********************************************************************************

use data/Base_ADM_preparada_Long, clear

gen post = date_dev > periodo_exp
gen T_post = Tratamiento*post
label var Tratamiento "Treatment"
label var post "Post"
label var T_post "Treatment*Post"

* Variables para usar al analizar heterogeneidad:
global Heterogeneidad ErrorPension esperada_pens rem_actual indep_pre FinancKnow CalculoPension PctajeAFP RepRate Pensionable ContVolPrevio //edad

* Variables para medir el n򭥲o de repeticiones diferentes:
bysort ID1 post cot_vol:  gen cot_vol_dif  = 1 if (_n == 1)
bysort ID1 post cot_obli: gen cot_obli_dif = 1 if (_n == 1)

merge m:1 ID1 ID2 using "src/sim_v2.dta"
  keep if _merge==3
  drop _merge

 collapse (sum) Afiliado savings TotCot=cot_obli NCot=Contributed TotCotVol=cot_vol NCotVol=ContributedVol cambio_fondo cambio_afp clave_activa indep pensionado (mean) Tratamiento Mujer edad Educ1 Educ2 Educ3 Educ4 JefeHogar Working LaborForce rem_actual ContVolPrevio saldo_uf monto_ahorrado deseada_pens esperada_pens ErrorPensionRel AFP_Importante comodidad_dec_financieras CalculoPension PctajeAFP FinancKnow periodo_exp meses_cot_act (first) lowPension highPension ErrorPension res_sim_* sv2_pension*, by(ID1 post)

 replace pensionado = pensionado > 0 //S󬯠estoy mirando si estaba retirado a final, no mes a mes
replace clave_activa = clave_activa > 0 // Una vez que la activ󠤥bertenerla todo el tiempo
replace cambio_afp = cambio_afp > 0 // Dependiendo de si nos interesa el n򭥲o de cambios o simplemente que se haya cambiado alguna vez
replace Afiliado = Afiliado > 0
gen ln_savings = ln(1+savings)
gen ln_cot_obli = ln(1+TotCot)
gen ln_cot_vol = ln(1+TotCotVol)

gen ihs_savings = log(savings + sqrt(savings^2 + 1))
gen ihs_cot_obli = log(TotCot + sqrt(TotCot^2 + 1))
gen ihs_cot_vol = log(TotCotVol + sqrt(TotCotVol^2 + 1))
  
gen T_post = Tratamiento*post
gen EverContVol = NCotVol > 0
gen EverCont = NCot > 0
gen AlwaysCont = NCot == 12
gen AlwaysContVol = NCotVol == 12

gen Cont1 = NCot == 1
gen ContVol1 = NCotVol == 1
gen Cont_over1 = NCot > 1
gen ContVol_over1 = NCotVol > 1

* Labels:
  label var Afiliado "Affiliated"
  label var NCotVol "N. of Voluntary Cont."
  label var NCot "N. of Mandatory Cont."
  label var ln_cot_vol "Voluntary Savings (logs)"
  label var ln_cot_obli "Mandatory Savings (logs)"
  label var ln_savings "Total Savings (logs)"
  label var ihs_cot_vol "Voluntary Savings (ihs)"
  label var ihs_cot_obli "Mandatory Savings (ihs)"
  label var ihs_savings "Total Savings (ihs)"
  label var pensionado "Retired"
  label var cambio_fondo "N. of Changes in Funds"
  label var cambio_afp "Ever Changed AFP"
  label var clave_activa "Active Password"
  label var Tratamiento "Personalized Info."
  label var EverContVol "Ever Contributed Voluntarily"
  label var EverCont "Ever Contributed Mandatorily"
  label var AlwaysCont "Cont. Mand. Every Month"
  label var AlwaysContVol "Cont. Vol. Every Month"

  label var Cont1 "Contributed Mandatorily Once"
  label var ContVol1 "Contributed Voluntarily Once"
  label var Cont_over1 "Contributed Mandatorily More Than Once"
  label var ContVol_over1 "Contributed Voluntarily More Than Once"
*****************************************************************

gen Perc_CotVol  = TotCotVol/TotCot
gen Perc_CotVol2 = TotCotVol/(TotCotVol+TotCot)
gen Perc_VolHigh = Perc_CotVol > 0.5 & Perc_CotVol != .
gen Perc_VolLow  = Perc_CotVol <= 0.5 & Perc_CotVol != 0


label var Perc_CotVol  "ContVol/ContMand"
label var Perc_CotVol2 "ContVol/(ContVol+ContMand)"
label var Perc_VolHigh "ContVol $ >$ 0.5*ContMand\%"
label var Perc_VolLow  "0 $ <$ ContVol $ \leq$ 0.5*ContMand\%"

*****************************************************************

merge m:1 ID1 using src/efecto_pension_nvo2_6m, keepusing(pension_*)
drop if _merge == 2 // Son personas que elimin顰or edad, retiro, consentimiento, etc
forvalues n = 1/7 {
replace pension_`n' = 0 if Afiliado == 0 // Estas personas necesariamente tienen pension proyectada cero.
*replace pension_`n' = 0 if _merge == 1 & Afiliado == 1 // Personas que se acaban de afiliar y no tienen suficientes datos.
}
drop _merge

label var pension_1 "No Changes"
label var pension_2 "Permanent Change - Vol. Savings"
label var pension_3 "Transitory Change - Vol. Savings"
label var pension_4 "Permanent Change - Mand. Savings"
label var pension_5 "Transitory Change - Mand. Savings"
label var pension_6 "Permanent Change - All"
label var pension_7 "Transitory Change - All"

global Pension pension_1 pension_2 pension_3 pension_4 pension_5 pension_6 pension_7

foreach var of varlist $Pension {
gen ln_`var'  = ln(1+`var')
gen ihs_`var' = ln(`var' + sqrt(`var'^2 + 1))
}

label var ln_pension_1 "No Changes (logs)"
label var ln_pension_2 "Vol. Savings (logs)"
label var ln_pension_3 "Vol. Savings (logs)"
label var ln_pension_4 "Mand. Savings (logs)"
label var ln_pension_5 "Mand. Savings (logs)"
label var ln_pension_6 "All Savings (logs)"
label var ln_pension_7 "All Savings (logs)"

label var ihs_pension_1 "No Changes (ihs)"
label var ihs_pension_2 "Vol. Savings (ihs)"
label var ihs_pension_3 "Vol. Savings (ihs)"
label var ihs_pension_4 "Mand. Savings (ihs)"
label var ihs_pension_5 "Mand. Savings (ihs)"
label var ihs_pension_6 "All Savings (ihs)"
label var ihs_pension_7 "All Savings (ihs)"

* Pension_1 es el baseline para las otras estimaciones:
forvalues n = 2/7 {
replace ln_pension_`n'  = ln_pension_1  if post == 0
replace ihs_pension_`n' = ihs_pension_1 if post == 0
}

xtset ID1 post

save data/Base_ADM_TotalFinal, replace

use "data/Base_ADM_preparada_Long.dta", clear

  drop NCotVol NCot TotCotVol TotCot

  gen diff = date_dev - periodo_exp

  gen post     = 1 if diff >= -12 & diff <= -6
  replace post = 2 if diff >= -5  & diff <=-1
  replace post = 3 if diff >= 1   & diff <=6
  replace post = 4 if diff >= 7   & diff <=12


  merge m:1 ID1 ID2 using "src/sim_v2.dta"
  keep if _merge==3
  drop _merge


  * Variables para usar al analizar heterogeneidad:

  collapse (sum) Afiliado savings TotCot=cot_obli NCot=Contributed TotCotVol=cot_vol NCotVol=ContributedVol cambio_fondo cambio_afp clave_activa indep pensionado (mean) Tratamiento Mujer edad Educ1 Educ2 Educ3 Educ4 JefeHogar Working LaborForce rem_actual ContVolPrevio saldo_uf monto_ahorrado deseada_pens esperada_pens ErrorPensionRel AFP_Importante comodidad_dec_financieras CalculoPension PctajeAFP FinancKnow periodo_exp meses_cot_act (first) lowPension highPension ErrorPension res_sim_* sv2_pension*, by(ID1 post)

  xtset ID1 post

  gen ln_res_sim_2 = ln(res_sim_2)
  gen ln_res_sim_3 = ln(res_sim_3)
  gen ln_res_sim_4 = ln(res_sim_4)
  gen ln_pen = ln(sv2_pension)
  gen ln_pen_dens = ln(sv2_pension_dens)
  gen ln_pen_apv = ln(sv2_pension_apv)
  gen ln_pen_jub = ln(sv2_pension_jub)

  replace pensionado = pensionado > 0 //Solo estoy mirando si estaba retirado a final, no mes a mes
  replace clave_activa = clave_activa > 0 // Una vez que la activÛ deberÌa tenerla todo el tiempo
  replace cambio_afp = cambio_afp > 0 // Dependiendo de si nos interesa el n˙mero de cambios o simplemente que se haya cambiado alguna vez
  replace Afiliado = Afiliado > 0
  gen ln_savings = ln(1+savings)
  gen ln_cot_obli = ln(1+TotCot)
  gen ln_cot_vol = ln(1+TotCotVol)

  gen ihs_savings = log(savings + sqrt(savings^2 + 1))
  gen ihs_cot_obli = log(TotCot + sqrt(TotCot^2 + 1))
  gen ihs_cot_vol = log(TotCotVol + sqrt(TotCotVol^2 + 1))

  gen T_post = Tratamiento*post
  gen EverContVol = NCotVol > 0
  gen EverCont = NCot > 0
  gen AlwaysCont = NCot == 6
  gen AlwaysContVol = NCotVol == 6

  gen Cont1 = NCot == 1
  gen ContVol1 = NCotVol == 1
  gen Cont_over1 = NCot > 1
  gen ContVol_over1 = NCotVol > 1

  * Labels:
  label var Afiliado "Affiliated"
  label var NCotVol "N. of Voluntary Cont."
  label var NCot "N. of Mandatory Cont."
  label var ln_cot_vol "Voluntary Savings (logs)"
  label var ln_cot_obli "Mandatory Savings (logs)"
  label var ln_savings "Total Savings (logs)"
  label var ihs_cot_vol "Voluntary Savings (ihs)"
  label var ihs_cot_obli "Mandatory Savings (ihs)"
  label var ihs_savings "Total Savings (ihs)"
  label var pensionado "Retired"
  label var cambio_fondo "N. of Changes in Funds"
  label var cambio_afp "Ever Changed AFP"
  label var clave_activa "Active Password"
  label var Tratamiento "Personalized Info."
  label var EverContVol "Ever Contributed Voluntarily"
  label var EverCont "Ever Contributed Mandatorily"
  label var AlwaysCont "Cont. Mand. Every Month"
  label var AlwaysContVol "Cont. Vol. Every Month"

  label var Cont1 "Contributed Mandatorily Once"
  label var ContVol1 "Contributed Voluntarily Once"
  label var Cont_over1 "Contributed Mandatorily More Than Once"
  label var ContVol_over1 "Contributed Voluntarily More Than Once"

  *****************************************************************

  gen Perc_CotVol  = TotCotVol/TotCot
  gen Perc_CotVol2 = TotCotVol/(TotCotVol+TotCot)
  gen Perc_VolHigh = Perc_CotVol > 0.5 & Perc_CotVol != .
  gen Perc_VolLow  = Perc_CotVol <= 0.5 & Perc_CotVol != 0

  label var Perc_CotVol  "ContVol/ContMand"
  label var Perc_CotVol2 "ContVol/(ContVol+ContMand)"
  label var Perc_VolHigh "ContVol $ >$ 0.5*ContMand/%"
  label var Perc_VolLow  "0 $ <$ ContVol $ /leq$ 0.5*ContMand/%"


  save "data/Base_ADM_TotalFinal_4Rangos_sim_pen.dta", replace
  
  
****ENDLINE SURVEY DATA***********
  
  use "src/Encuesta Telefonica ago sept 2014.dta", clear

gen diff_id1 = id1 - verificaci_n_id1
gen diff_id2 = id2 - verificaci_n_id2
sum diff_id* 

gen diff_id1_2 = id1 - verificaci_n_id1_01
gen diff_id2_2 = id2 - verificaci_n_id2_01
sum diff_id*_2 

drop verificaci_n_id* verificaci_n_id*_01 diff_id* diff_id*_2

drop admin_start_time admin_dig_name of_chileat inicio_llamado admin_siguiente pag_2_siguiente_p_gina pag_3_siguiente_p_gina p_g_4_siguiente_p_gina p_g_5_siguiente_p_gina ///
     p_g_6_siguiente_p_gina p_g_7_siguiente_p_gina p_g_8_siguiente_p_gina fin_del_llamado hora_de_t_rmino_de_digitaci_n terminar_la_digitaci_n terminar_la_digitaci_n_01
	 
gen Cohorte = 1
label var Cohorte "Tanda de encuestas (seg򮠭es)"

save "data/Encuesta_AgostoSeptiembre2014", replace


use "src/Encuesta Telefonica oct nov 2014.dta", clear

* Verificar ID's:
gen diff_id1 = id1 - verificaci_n_id1 
gen diff_id2 = id2 - verificaci_n_id2 
sum diff_id* 
drop diff_id1 diff_id2 verificaci_n_id1 verificaci_n_id2

* Variables no utilizables:
drop admin_start_time admin_dig_name of_chileat inicio_llamado admin_siguiente pag_2_siguiente_p_gina pag_3_siguiente_p_gina p_g_4_siguiente_p_gina p_g_5_siguiente_p_gina ///
     p_g_6_siguiente_p_gina p_g_7_siguiente_p_gina p_g_8_siguiente_p_gina fin_del_llamado hora_de_t_rmino_de_digitaci_n terminar_la_digitaci_n

destring p5_s2, replace

gen Cohorte = 2

save "data/Encuesta_OctubreNoviembre2014", replace


use "src/Encuesta Telefonica dic 2014.dta", clear

* Verificar ID's:
gen diff_id1 = id1 - verificaci_n_id1 
gen diff_id2 = id2 - verificaci_n_id2 
sum diff_id* 
drop diff_id1 diff_id2 verificaci_n_id1 verificaci_n_id2

drop of_chileat admin_start_time admin_dig_name inicio_llamado admin_siguiente pag_2_siguiente_p_gina pag_3_siguiente_p_gina p_g_4_siguiente_p_gina p_g_5_siguiente_p_gina ///
     p_g_6_siguiente_p_gina p_g_7_siguiente_p_gina p_g_8_siguiente_p_gina fin_del_llamado hora_de_t_rmino_de_digitaci_n terminar_la_digitaci_n

replace p5_s2 = "" if p5_s2 == "no sa"
replace p5_s2 = "" if p5_s2 == "�"
replace p5_s2 = "19" if p5_s2 == "19%"
replace p5_s2 = "12" if p5_s2 == "12%"
replace p5_s2 = "12,5" if p5_s2 == "12-13"
replace p5_s2 = subinstr(p5_s2,",",".",.)

destring p5_s2, replace force // I have to force this because there's 1 observaction with an unrecognized character that I couldn't replace.

gen Cohorte = 3
 
save "data/Encuesta_Diciembre2014", replace


use "src/Encuesta Telefonica dic 2014_contactos extra.dta", clear

* Verificar ID's:
gen diff_id1 = id1 - verificaci_n_id1 
gen diff_id2 = id2 - verificaci_n_id2 
sum diff_id* 
drop diff_id1 diff_id2 verificaci_n_id1 verificaci_n_id2

drop admin_star_date of_chileat admin_start_time admin_dig_name inicio_llamado admin_siguiente pag_2_siguiente_p_gina pag_3_siguiente_p_gina p_g_4_siguiente_p_gina p_g_5_siguiente_p_gina ///
     p_g_6_siguiente_p_gina p_g_7_siguiente_p_gina p_g_8_siguiente_p_gina fin_del_llamado hora_de_t_rmino_de_digitaci_n terminar_la_digitaci_n

replace p5_s2 = subinstr(p5_s2,",",".",.)
destring p5_s2, replace

gen Cohorte = 3
 
save "data/Encuesta_Diciembre2014_Extra", replace


use "src/Encuesta Telefonica ene 2015 - Mascara antigua.dta", clear

* Verificar ID's:
gen diff_id1 = id1 - verificaci_n_id1 
gen diff_id2 = id2 - verificaci_n_id2 
sum diff_id* 
drop diff_id1 diff_id2 verificaci_n_id1 verificaci_n_id2

drop of_chileat admin_start_time admin_dig_name inicio_llamado admin_siguiente pag_2_siguiente_p_gina pag_3_siguiente_p_gina p_g_4_siguiente_p_gina p_g_5_siguiente_p_gina ///
     p_g_6_siguiente_p_gina p_g_7_siguiente_p_gina p_g_8_siguiente_p_gina fin_del_llamado hora_de_t_rmino_de_digitaci_n terminar_la_digitaci_n

replace p5_s2 = subinstr(p5_s2,",",".",.)
destring p5_s2, replace

gen Cohorte = 4
 
save "data/Encuesta_Enero2015_Antigua", replace


use "src/Datos version 2 mascara ene-feb-msje.dta", clear

* Verificar ID's:
gen diff_id1 = id1 - verificaci_n_id1 
gen diff_id2 = id2 - verificaci_n_id2 
sum diff_id* 
drop diff_id1 diff_id2 verificaci_n_id1 verificaci_n_id2

drop of_chileat admin_start_time admin_dig_name inicio_llamado admin_siguiente pag_2_siguiente_p_gina pag_3_siguiente_p_gina p_g_4_siguiente_p_gina p_g_5_siguiente_p_gina ///
     p_g_6_siguiente_p_gina p_g_7_siguiente_p_gina p_g_8_siguiente_p_gina fin_del_llamado hora_de_t_rmino_de_digitaci_n terminar_la_digitaci_n

replace p5_s2 = subinstr(p5_s2,",",".",.)
destring p5_s2, replace

gen Cohorte = 5
 
save "data/Encuesta_Enero2015_Nueva", replace


use "data/Encuesta_Diciembre2014", clear
append using "data/Encuesta_Enero2015_Nueva"     , nolabel
append using "data/Encuesta_Enero2015_Antigua"   , nolabel
append using "data/Encuesta_Diciembre2014_Extra" , nolabel
append using "data/Encuesta_OctubreNoviembre2014", nolabel
append using "data/Encuesta_AgostoSeptiembre2014", nolabel

* Botar personas sin consentimiento:
drop if consentimiento_para_participar_e != 1

rename (id1 id2) (ID1 ID2)

gen survey_day   = substr(admin_star_date,1,2)
gen survey_month = substr(admin_star_date,4,2)
gen survey_year  = substr(admin_star_date,7,4)
destring survey_day survey_month survey_year, replace
gen survey_date = mdy(survey_month, survey_day, survey_year)
format %tdMon_DD,_CCYY survey_date 
label variable survey_date "Survey Data"
drop admin_star_date survey_day survey_month survey_year

* Repeated observationes (they were surveyed twice so I'll keep the first one):
bysort ID1 ID2: gen reps = _N
by ID1 ID2: egen firstdate = min(survey_date)
drop if reps == 2 & survey_date != firstdate // (2 observations deleted)
drop reps firstdate

* Merge encuesta LB/tratamiento:
merge 1:1 ID1 ID2 using "data/Base_Encuesta_procesada"
gen Seguimiento = _merge == 3 
drop _merge

* Merge status independiente (solo esta en la base outcomes):
merge 1:1 ID1 ID2 using "data/Base_ADM_preparada", keepusing(indep_pre ContVolPrevio no_rem_actual no_saldo_uf TotCotVol TotCot NCotVol NCot ContVolPrevio)
drop _merge

replace indep_pre = situacion_laboral_p2 == 1 // Estos son independientes seg򮠥ncuesta LB.

* Transformar algunas variables de 1/2 a 0/1:
replace p1_s5 = 0 if p1_s5 == 2
replace p3_s2 = 0 if p3_s2 == 2
replace p5_s3 = 0 if p5_s3 == 2
replace p6_s2 = 0 if p6_s2 == 2 
replace p1_s1 = 0 if p1_s1 == 2
replace p2_s6 = 0 if p2_s6 == 2
replace p4_s4 = . if p4_s4 == 0 & Cohorte == 1 // En el primer cohorte no deber haber respondido "0" porque el formato era 1/2.
replace p4_s4 = 0 if p4_s4 == 2
replace p1_s4 = . if p1_s4 == 1

* Preguntas que voy a arreglar manualmente (son pocos casos, en los que las alternativas no calzan con las "posibles"):
replace p1_2_s4 = . if p1_2_s4 == 8 // Es una pregunta si/no as𬑵e no puede haber un 8.
replace p2_s6 = . if p2_s6 == 5 | p2_s6 == 8 | p2_s6 == 9
replace p5_s4 = . if p5_s4 == 0
replace p6_1_s5 = . if p6_1_s5 == 3
replace p7_s2 = . if p7_s2 == 1 // Claramente nadie piensa que la edad de jubilaci󮠥s 1 a񯠰or lo que debe haber respondido eso como una forma de omitir.
replace p3_s5 = . if p3_s5 == 1 // Esta pregunta no se responde.
replace p7_s5 = . if p7_s5 == 1 // Esta pregunta no se responde.

* Variables que tratan los missing distinto en ultimo cohorte:
local OpcionMultiple p3_1_s1 p3_2_s1 p3_3_s1 p3_4_s1 p3_5_s1 p1_1_s3 p1_2_s3 p1_3_s3 p1_4_s3 p1_5_s3 p2_1_s3 p2_2_s3 p2_3_s3 p2_4_s3 p2_5_s3 ///
                     p3_1_s3 p3_2_s3 p3_3_s3 p3_4_sa p3_4_s3 p4_1_s3 p4_2_s3 p4_3_s3 p4_4_s3 p4_5_s3 p4_6_s3 p5_s3 p1_1_s4 p1_2_s4 p1_3_s4  ///
					 p1_4_s4 p1_5_s4 p1_6_s4 p1_7_s4 p3_1_s5 p3_2_s5 p3_3_s5 p3_4_s5 p3_5_s5 p3_6_s5 p3_7_s5 p3_9_s5 p7_1_s5 p7_2_s5 p7_3_s5 ///
				     p7_4_s5 p7_5_s5
		     	     
foreach var of varlist `OpcionMultiple' {
replace `var' = 0 if `var' == 2
}

* Arreglar valores de preguntas de opci󮠭򬴩ple (i.e., rellenar con cero cuando respondi󠬡 pregunta):
* Esto no es necesario en el ultimo cohorte pero si en lo anteriores.
local p3_s1 p3_1_s1 p3_2_s1 p3_3_s1 p3_4_s1 p3_5_s1
local p1_s3 p1_1_s3 p1_2_s3 p1_3_s3 p1_4_s3 p1_5_s3
local p2_s3 p2_1_s3 p2_2_s3 p2_3_s3 p2_4_s3 p2_5_s3
local p3_s3 p3_1_s3 p3_2_s3 p3_3_s3 p3_4_s3 p3_4_sa
local p4_s3 p4_1_s3 p4_2_s3 p4_3_s3 p4_4_s3 p4_5_s3 p4_6_s3
local p1_s4 p1_1_s4 p1_2_s4 p1_3_s4 p1_4_s4 p1_5_s4 p1_6_s4 p1_7_s4
local p3_s5 p3_1_s5 p3_2_s5 p3_3_s5 p3_4_s5 p3_5_s5 p3_6_s5 p3_7_s5 p3_8_s5
local p6_s5 p6_1_s5 p6_2_s5 p6_3_s5
local p7_s5 p7_1_s5 p7_2_s5 p7_3_s5 p7_4_s5 p7_5_s5

local preguntas_opciones p3_s1 p1_s3 p2_s3 p3_s3 p4_s3 p1_s4 p3_s5 p6_s5 p7_s5

foreach lista of varlist `preguntas_opciones' {

egen NonMiss = rownonmiss(``lista'')

foreach var of varlist ``lista'' {
di "`var'"
di "``lista''"
desc `var'
replace `var' = 0 if `var' == . & NonMiss != 0
}
drop NonMiss
}

* Missings cuando sabemos que si respondieron la pregunta:
foreach var of varlist p1_1_s3 p1_2_s3 p1_3_s3 p1_4_s3 p1_5_s3 p2_1_s3 p2_2_s3 p2_3_s3 p2_4_s3 p2_5_s3 {
replace `var' = 0 if `var' ==. & condicion_1 == 1 // La condicion significa que si respondi󠬡 pregunta.
}
foreach var of varlist p1_1_s3 p1_2_s3 p1_3_s3 p1_4_s3 p1_5_s3 p2_1_s3 p2_2_s3 p2_3_s3 p2_4_s3 p2_5_s3 {
replace `var' = 0 if `var' ==. & condicion_2 == 1 // La condicion significa que si respondi󠬡 pregunta.
}

replace p2_s1 = 4 if p1_s1 == 0
replace p3_1_s1 = 0 if p1_s1 == 0 | p2_s1 == 4
replace p3_2_s1 = 0 if p1_s1 == 0 | p2_s1 == 4
replace p3_3_s1 = 0 if p1_s1 == 0 | p2_s1 == 4
replace p3_4_s1 = 0 if p1_s1 == 0 | p2_s1 == 4
replace p3_5_s1 = 0 if p1_s1 == 0 | p2_s1 == 4

replace p5_s2 = 0 if p5_s2 == . & (p6_s2!=. | p7_s2!=.)
replace p6_s2 = 0 if p6_s2 == . & (p5_s2!=. | p7_s2!=.)
replace p7_s2 = 0 if p7_s2 == . & (p5_s2!=. | p6_s2!=.)

* Creaci󮠤e variables adicionales:
gen p2_s1_contrib = p2_s1 == 1 if p2_s1 != .
gen p2_s1_aument = p2_s1 == 2 if p2_s1 != .
gen p2_s1_simul = p2_s1 == 3 if p2_s1 != .
gen p2_s1_nosabe = p2_s1 == 4 if p2_s1 != .
order p2_s1_contrib p2_s1_aument p2_s1_simul p2_s1_nosabe, after(p2_s1)

gen p4_s2_last3 = p4_s2 == 1 if p4_s2 != .
gen p4_s2_rules = p4_s2 == 2 if p4_s2 != .
gen p4_s2_contr = p4_s2 == 3 if p4_s2 != .
gen p4_s2_other = p4_s2 == 4 if p4_s2 != .
replace p4_s2_last3 = 0 if p4_s2_last3 == . & (p3_s2!=. & p2_s2!=.)
replace p4_s2_rules = 0 if p4_s2_rules == . & (p3_s2!=. & p2_s2!=.)
replace p4_s2_contr = 0 if p4_s2_contr == . & (p3_s2!=. & p2_s2!=.)
replace p4_s2_other = 0 if p4_s2_other == . & (p3_s2!=. & p2_s2!=.)
order p4_s2_last3 p4_s2_rules p4_s2_contr p4_s2_other, after(p4_s2)

gen p5_s2_correct = p5_s2 >= 10 & p5_s2 <= 10 if p5_s2 !=. // estoy aceptando a un rango mayor por si consideran las comisiones.
order p5_s2_correct, after(p5_s2)

gen p7_s2_correct = ((p7_s2 == 60 & sexo == "F") | (p7_s2 == 65 & sexo == "M")) if p7_s2 != .
order p7_s2_correct, after(p7_s2)

gen p1_1_s3_afilia = p1_1_s3 == 1 if p1_1_s3 != .
gen p1_1_s3_desafi = p1_1_s3 == 3 if p1_1_s3 != .
order p1_1_s3_afilia p1_1_s3_desafi, after(p1_1_s3)

gen p2_1_s3_afilia = p2_1_s3 == 1 if p2_1_s3 != .
gen p2_1_s3_desafi = p2_1_s3 == 3 if p2_1_s3 != .
order p2_1_s3_afilia p2_1_s3_desafi, after(p2_1_s3)

gen WorkingSEG = p1_1_s4==1 | p1_2_s4==1 if p1_1_s4!=.
order WorkingSEG, after(p1_7_s4)
replace p2_s4 = 0 if p2_s4==. & WorkingSEG==0
replace p3_s4 = 0 if p3_s4 == . & p2_s4 != .
replace p4_s4 = 0 if WorkingSEG == 0 
gen SavingsRate = p4_s5/p2_s4/12

gen p5_s4_fonasa = p5_s4 == 1 if p5_s4 ~= .
gen p5_s4_isapre = p5_s4 == 2 if p5_s4 != .
gen p5_s4_salud = (p5_s4 == 1 | p5_s4 == 2) if p5_s4 != .
order p5_s4_fonasa p5_s4_isapre p5_s4_salud, after(p5_s4)

gen p5_s5_1 = p5_s5 == 1 if p5_s5 != .
gen p5_s5_2 = p5_s5 == 2 if p5_s5 != .
gen p5_s5_3 = p5_s5 == 3 if p5_s5 != .
gen p5_s5_4 = p5_s5 == 4 if p5_s5 != .
gen p5_s5_import = (p5_s5 == 1 | p5_s5 == 2) if p5_s5 != .
gen p5_s5_pocoimp = (p5_s5 == 3 | p5_s5 == 4) if p5_s5 != .
order p5_s5_1 p5_s5_2 p5_s5_3 p5_s5_4 p5_s5_import p5_s5_pocoimp, after(p5_s5)

gen ln_p2_s5 = ln(1+p2_s5)
order ln_p2_s5, after(p2_s5)

gen ln_p4_s5 = ln(p4_s5)
order ln_p4_s5, after(p4_s5)

foreach var of varlist p3_*_s5 {
replace `var' = 0 if `var' == . & p1_s5 == 0 // Esta variable pregunta si tienen otros ahorros o no.
}

replace ln_p2_s5 = 0 if ln_p2_s5==. & p1_s5!=.

sum ln_p2_s5, d
replace ln_p2_s5 = r(p99) if ln_p2_s5 > r(p99) & ln_p2_s5 != .

replace p6_1_s5 = 1 if p6_s5 == 1
replace p6_2_s5 = 1 if p6_s5 == 2
replace p6_3_s5 = 1 if p6_s5 == 3
replace p6_1_s5 = 0 if p6_1_s5==. & (p6_2_s5!=. | p6_3_s5!=.)
replace p6_2_s5 = 0 if p6_2_s5==. & (p6_1_s5!=. | p6_3_s5!=.)
replace p6_3_s5 = 0 if p6_3_s5==. & (p6_1_s5!=. | p6_2_s5!=.)


label var p1_s1 "Module recall"
label var p2_s1_contrib "Pensions, wages, etc (general)"
label var p2_s1_aument "How to increase pension"
label var p2_s1_simul "Module with alternatives to inc. pension"
label var p2_s1_nosabe "Does not remember"
label var p3_1_s1 "Variation in voluntary savings"
label var p3_2_s1 "Variation in retirement age"
label var p3_3_s1 "Variation in contributions freq."
label var p3_4_s1 "Personalized information"
label var p3_5_s1 "Other information"
label var p4_s1 "Valuation of info received (1-7)"

label var p1_s2 "Pensions system knowledge (1-7)"
label var p2_s2 "Relevance of being informed (1-7)"
label var p3_s2 "Informed about system (last 10 months)"
label var p4_s2 "How are pensions calculated?"
label var p4_s2_last3 "Based on last 3 years wage"
label var p4_s2_rules  "Rules set by each administrator"
label var p4_s2_contr  "Knows how are pensions calculated"
label var p4_s2_other "Other"
label var p5_s2_correct "Knows \% discounted by AFP"
label var p6_s2 "Understands voluntary savings (APV)"
label var p7_s2_correct "Knows retirement age"

label var p1_1_s3_afilia "Affiliating to AFP"
label var p1_1_s3_desafi "Disaffiliating from AFP"
label var p1_2_s3 "Initializing/increasing voluntary savings"
label var p1_3_s3 "Changing contributions frequency"
label var p1_4_s3 "Changing expected retirement age"
label var p1_5_s3 "Informing more about the system"
label var p2_s3 "During the last year:"
label var p2_1_s3_afilia "Affiliated to AFP"
label var p2_1_s3_desafi "Disaffiliated from AFP"
label var p2_1_s3 "Affiliated to an AFP"
label var p2_2_s3 "Initialized/increased voluntary savings"
label var p2_3_s3 "Changed contributions frequency"
label var p2_4_s3 "Changed expected retirement age"
label var p2_5_s3 "Informed more about the system"
label var condicion_1 ""
label var condicion_2 ""
label var p3_s3 "What motivated this changes"
label var p3_1_s3 "Information received in the module"
label var p3_2_s3 "Information from other source"
label var p3_3_s3 "Motivation from close people"
label var p3_4_sa "Personal motivation"
label var p3_4_s3 "Other"
label var p3_4_1_s3 ""
label var p4_1_s3 "Forgot about it or did not have time"
label var p4_2_s3 "Not convenient"
label var p4_3_s3 "Not enough money"
label var p4_4_s3 "Does not know how to do it"
label var p4_5_s3 "Does not depend on them"
label var p4_6_s3 "Does not know/Does not answer"
label var p5_s3 "Discussed pension with close people"

label var p1_1_s4 "Independent"
label var p1_2_s4 "Dependent"
label var p1_3_s4 "Looking for job"
label var p1_4_s4 "Studying"
label var p1_5_s4 "Retired"
label var p1_6_s4 "Retired with pension"
label var p1_7_s4 "Inactive"
label var p1_8_s4 "Does not know"
label var p2_s4 "Income from main occupation"
label var p3_s4 "Additional income"
label var p4_s4 "Working with contract"
label var p5_s4_fonasa "Public health insurance"
label var p5_s4_isapre  "Private health insurance"
label var p5_s4_salud "Health insurance (public or private)"

label var p1_s5 "Has other savings for retirement"
label var ln_p2_s5 "Savings outside the system (log)"
label var p3_1_s5 "APV"
label var p3_2_s5 "Bank account"
label var p3_3_s5 "Fixed-term deposit"
label var p3_4_s5 "Mutual funds"
label var p3_5_s5 "Stocks"
label var p3_6_s5 "Real estate"
label var p3_7_s5 "Other goods (cars, etc.)"
label var p3_8_s5 "Others"
label var p4_s5 "Savings last year"
label var ln_p4_s5 "Savings last year (log)"
label var p5_s5_1 "System's pension very important"
label var p5_s5_2 "System's pension important"
label var p5_s5_3 "System's pension not very important"
label var p5_s5_4 "System's pension not important"
label var p5_s5_import "System's pension important (1-2)"
label var p5_s5_pocoimp "System's pension not important (3-4)"
label var p6_1_s5 "Pension and government transfers"
label var p6_2_s5 "Pension and complementary sources"
label var p6_3_s5 "Not clear"
label var p7_1_s5 "Other savings"
label var p7_2_s5 "Keep working"
label var p7_3_s5 "Family help"
label var p7_4_s5 "Real estate"
label var p7_5_s5 "Other"
label var WorkingSEG "Working"
label var p1_1_s4 "Employed"
label var SavingsRate "Savings Rate"

label var p1_s6 "AFP qualification (1-7)"
label var p2_s6 "Pension is an adequate retribution (0-1)"
label var p3_s6 "Trust in the system (1-7)"

gen ln_rem_actual = ln(1+rem_actual)
xtile I_rem_actual = rem_actual, nquantiles(10)

gen ln_res_sim_1 = ln(1+res_sim_1)

save "data/Encuesta_SEG_Preparada", replace
